clear
set more off
capture log close
cd "P:\2018\186"

gl data_dir "P:\2018\186/use"
gl save_dir "P:\2018\186/Martin/Decompositions_SE_extLLSMM/Grandparents"
  
log using ${save_dir}/SE_iv_parent_incomes_v2.log, replace

******************************************
/* USE 5TH INCOME AS ENDOGENOUS INCOME */
  
*USE the ranked MAIN SAMPLE 
use *idnr *earn* *yob *obsy *max* *edu* *emp* year woman if year>=1985 & year<=2019 & yob>=1952 & yob<=1994 using "use\swepanel_zeros", replace


* Rename variable names

rename pmearn pm_LAB
rename pfearn pf_LAB
rename pearn pLAB

rename memp m_emplavg
rename femp f_emplavg
rename emp employ

rename medu m_schmax
rename fedu f_schmax
rename edu schmax

ren idnr newid
ren midnr m_newid
ren fidnr f_newid

gen AGEC=year-yob		// Age variables and normalization
gen AGEC1=AGEC-40		
g f_LABAGE_1=fobsy-fyob
g m_LABAGE_1=mobsy-myob
 
forval i=2/4 {
	gen AGEC`i'=AGEC1^`i'
	gen f_LABAGE_`i'=f_LABAGE_1^`i'
	gen m_LABAGE_`i'=m_LABAGE_1^`i'
}

* Add the annual incomes of parents
rename newid idnr
merge m:1 idnr using "use\pmf_ann_inc_cs_sample_usswe", keep(match master) nogen
rename idnr newid

rename   pm_LAB_16 pm_LAB_a16
rename   pf_LAB_16 pf_LAB_a16

*Age of parent at child age 16 observed income (endogenous income for IV approach)
ren f_LABAGE_16 f_LABAGE16_1
ren m_LABAGE_16 m_LABAGE16_1
cap drop m_LABAGE_12 m_LABAGE_21  f_LABAGE_12 f_LABAGE_21

forval i=2/4 {
	gen f_LABAGE16_`i'=f_LABAGE16_1^`i'
	gen m_LABAGE16_`i'=m_LABAGE16_1^`i'
}
 
* MOTHER's SAMPLE
 keep if pm_LAB!=. & m_LABAGE_1!=.
 keep if m_schmax!=. 
 keep if m_emplavg!=. 
 
* FATHER's SAMPLE
 keep if pf_LAB!=. & f_LABAGE_1!=.
 keep if f_schmax!=. 
 keep if f_emplavg!=. 


*Variable for Early/late time periods
gen period=.
replace period=1 if inrange(year,1985,1995)
replace period=2 if inrange(year,2008,2019)


*Check age at each parental income measure
tabstat m_LABAGE16_1 , by(period) stat(n mean sd min max)
tabstat f_LABAGE16_1 , by(period) stat(n mean sd min max)

tempfile rankedsample
	save `rankedsample'
	
*************************************************
* SPECIFICATION 1: ONE PARENT, ONLY INCOME  
*************************************************
use `rankedsample', clear

*************
* MOTHERS 

  
* RESIDUALIZE average measure for child and mother for IRP with average parent income
 foreach var in pLAB pm_LAB  {
	qui regress `var' m_LABAGE_? AGEC? i.year 
	predict `var'_r, resid
 }

* RESDIUALIZE annual measures for mother -- use endogenous measure age
 foreach var in pm_LAB_12  pm_LAB_a16  pm_LAB_21 {
	qui regress `var' m_LABAGE16_? AGEC? i.year 
	predict `var'_r, resid
 }

* RESDIUALIZE child income for IV approach (add "1" to variable name) with mothers
 foreach var in pLAB {
	qui regress `var' m_LABAGE16_? AGEC? i.year 
	predict `var'1_r, resid
 }

**** Early period   
preserve
local t=1
 loc yrmin=1985
 loc yrmax=1995
 keep if inrange(year,`yrmin',`yrmax') 
 
 capture noisily matrix drop B`t'm N`t'm

*IRP with parent average income
 regress pLAB_r pm_LAB_r, cluster(newid)
 matrix B`t'm=_b[pm_LAB_r]
 matrix N`t'm=e(N)
 
*IRPs using one parent income rank to instrument for another
foreach a in 12 21 {
 ivregress 2sls pLAB1_r (pm_LAB_a16_r = pm_LAB_`a'_r ), cluster(newid)
 matrix B`t'm=B`t'm,_b[pm_LAB_a16_r]
 matrix N`t'm=N`t'm,e(N)
}
restore 

**** Late period 
preserve
local t=2
 loc yrmin=2008
 loc yrmax=2019
 keep if inrange(year,`yrmin',`yrmax') 
 
 capture noisily matrix drop B`t'm N`t'm

*IRP with parent average income
 regress pLAB_r pm_LAB_r, cluster(newid)
 matrix B`t'm=_b[pm_LAB_r]
 matrix N`t'm=e(N)
 
*IRPs using one parent income rank to instrument for another
foreach a in 12 21 {
 ivregress 2sls pLAB1_r (pm_LAB_a16_r = pm_LAB_`a'_r ), cluster(newid)
 matrix B`t'm=B`t'm,_b[pm_LAB_a16_r]
 matrix N`t'm=N`t'm,e(N)
}
restore  


**************
* FATHERS 

  
* RESIDUALIZE average measure for child and father for IRP with average parent income
 drop pLAB_r
 foreach var in pLAB pf_LAB  {
	qui regress `var' f_LABAGE_? AGEC? i.year 
	predict `var'_r, resid
 }

* RESDIUALIZE annual measures for father -- use endogenous measure age
 foreach var in pf_LAB_12  pf_LAB_a16  pf_LAB_21 {
	qui regress `var' f_LABAGE16_? AGEC? i.year 
	predict `var'_r, resid
 }

* RESDIUALIZE child income for IV approach (add "1" to variable name) with fathers
 drop pLAB1_r
 foreach var in pLAB {
	qui regress `var' f_LABAGE16_? AGEC? i.year 
	predict `var'1_r, resid
 }

**** Early period   
preserve
local t=1
 loc yrmin=1985
 loc yrmax=1995
 keep if inrange(year,`yrmin',`yrmax') 
 
 capture noisily matrix drop B`t'f N`t'f

*IRP with parent average income
 regress pLAB_r pf_LAB_r, cluster(newid)
 matrix B`t'f=_b[pf_LAB_r]
 matrix N`t'f=e(N)
 
*IRPs using one parent income rank to instrument for another
foreach a in 12 21 {
 ivregress 2sls pLAB1_r (pf_LAB_a16_r = pf_LAB_`a'_r ), cluster(newid)
 matrix B`t'f=B`t'f,_b[pf_LAB_a16_r]
 matrix N`t'f=N`t'f,e(N)
}
restore 

**** Late period 
preserve
local t=2
 loc yrmin=2008
 loc yrmax=2019
 keep if inrange(year,`yrmin',`yrmax') 
 
 capture noisily matrix drop B`t'f N`t'f

*IRP with parent average income
 regress pLAB_r pf_LAB_r, cluster(newid)
 matrix B`t'f=_b[pf_LAB_r]
 matrix N`t'f=e(N)
 
*IRPs using one parent income rank to instrument for another
foreach a in 12 21 {
 ivregress 2sls pLAB1_r (pf_LAB_a16_r = pf_LAB_`a'_r ), cluster(newid)
 matrix B`t'f=B`t'f,_b[pf_LAB_a16_r]
 matrix N`t'f=N`t'f,e(N)
}
restore  


**** Save results to datset 

clear 
set obs 1
gen temp=1 
forv t=1/2 {
 svmat B`t'm
 svmat B`t'f
 svmat N`t'm
 svmat N`t'f
 }
 
 reshape long B1m B2m N1m N2m B1f B2f N1f N2f, i(temp) j(agez) 
 replace agez=0  if agez==1
 replace agez=12 if agez==2
 replace agez=21 if agez==3
 
 label define agez_vals 0 "avg" 12 "Child age 12 (up to 15)" 21 "Child age 21 (down to 18)" 
 label values agez agez_vals 
 
 reshape long B1 N1 B2 N2, i(temp agez) j(parent) string
 drop temp 
 sort parent agez 
 
 label var B1 "Early Period IRPs"
 label var B2 "Late Period IRPs"
 
 list
 
 gen spec=1
 
qui compress 
save ${save_dir}/SE_irps_iv_v2.dta, replace


 
 
*************************************************
* SPECIFICATION 2: ONE PARENT, INCOME & HC
*************************************************
use `rankedsample', clear

************************
* MOTHERS 

  
* RESIDUALIZE average measure for child and mother for IRP with average parent income
 foreach var in pLAB pm_LAB m_schmax {
	qui regress `var' m_LABAGE_? AGEC? i.year 
	predict `var'_r, resid
 }

* RESDIUALIZE annual measures for mother -- use endogenous measure age
 foreach var in pm_LAB_12  pm_LAB_a16  pm_LAB_21 {
	qui regress `var' m_LABAGE16_? AGEC? i.year 
	predict `var'_r, resid
 }

* RESDIUALIZE child income for IV approach (add "1" to variable name) with mothers
 foreach var in pLAB m_schmax {
	qui regress `var' m_LABAGE16_? AGEC? i.year 
	predict `var'1_r, resid
 }

**** Early period   
preserve
local t=1
 loc yrmin=1985
 loc yrmax=1995
 keep if inrange(year,`yrmin',`yrmax') 
 
 capture noisily matrix drop B`t'm H`t'm N`t'm

*IRP with parent average income
 regress pLAB_r pm_LAB_r m_schmax_r, cluster(newid)
 matrix B`t'm=_b[pm_LAB_r]
 matrix H`t'm=_b[m_schmax_r]
 matrix N`t'm=e(N)
 
*IRPs using one parent income rank to instrument for another
foreach a in 12 21 {
 ivregress 2sls pLAB1_r (pm_LAB_a16_r = pm_LAB_`a'_r ) m_schmax1_r, cluster(newid)
 matrix B`t'm=B`t'm,_b[pm_LAB_a16_r]
 matrix H`t'm=H`t'm,_b[m_schmax1_r]
 matrix N`t'm=N`t'm,e(N)
}
restore 

**** Late period 
preserve
local t=2
 loc yrmin=2008
 loc yrmax=2019
 keep if inrange(year,`yrmin',`yrmax') 
 
 capture noisily matrix drop B`t'm H`t'm N`t'm

*IRP with parent average income
 regress pLAB_r pm_LAB_r m_schmax_r, cluster(newid)
 matrix B`t'm=_b[pm_LAB_r]
 matrix H`t'm=_b[m_schmax_r]
 matrix N`t'm=e(N)
 
*IRPs using one parent income rank to instrument for another
foreach a in 12 21 {
 ivregress 2sls pLAB1_r (pm_LAB_a16_r = pm_LAB_`a'_r ) m_schmax1_r, cluster(newid)
 matrix B`t'm=B`t'm,_b[pm_LAB_a16_r]
 matrix H`t'm=H`t'm,_b[m_schmax1_r]
 matrix N`t'm=N`t'm,e(N)
}
restore   

************************
* FATHERS 
  
* RESIDUALIZE average measure for child and father for IRP with average parent income
 drop pLAB_r
 foreach var in pLAB pf_LAB f_schmax {
	qui regress `var' f_LABAGE_? AGEC? i.year 
	predict `var'_r, resid
 }

* RESDIUALIZE annual measures for father -- use endogenous measure age
 foreach var in pf_LAB_12  pf_LAB_a16  pf_LAB_21 {
	qui regress `var' f_LABAGE16_? AGEC? i.year 
	predict `var'_r, resid
 }

* RESDIUALIZE child income for IV approach (add "1" to variable name) with fathers
 drop pLAB1_r
 foreach var in pLAB f_schmax {
	qui regress `var' f_LABAGE16_? AGEC? i.year 
	predict `var'1_r, resid
 }

**** Early period   
preserve
local t=1
 loc yrmin=1985
 loc yrmax=1995
 keep if inrange(year,`yrmin',`yrmax') 
 
 capture noisily matrix drop B`t'f H`t'f N`t'f

*IRP with parent average income
 regress pLAB_r pf_LAB_r f_schmax_r, cluster(newid)
 matrix B`t'f=_b[pf_LAB_r]
 matrix H`t'f=_b[f_schmax_r]
 matrix N`t'f=e(N)
 
*IRPs using one parent income rank to instrument for another
foreach a in 12 21 {
 ivregress 2sls pLAB1_r (pf_LAB_a16_r = pf_LAB_`a'_r ) f_schmax1_r, cluster(newid)
 matrix B`t'f=B`t'f,_b[pf_LAB_a16_r]
 matrix H`t'f=H`t'f,_b[f_schmax1_r]
 matrix N`t'f=N`t'f,e(N)
}
restore 

**** Late period 
preserve
local t=2
 loc yrmin=2008
 loc yrmax=2019
 keep if inrange(year,`yrmin',`yrmax') 
 
 capture noisily matrix drop B`t'f H`t'f N`t'f

*IRP with parent average income
 regress pLAB_r pf_LAB_r f_schmax_r, cluster(newid)
 matrix B`t'f=_b[pf_LAB_r]
 matrix H`t'f=_b[f_schmax_r]
 matrix N`t'f=e(N)
 
*IRPs using one parent income rank to instrument for another
foreach a in 12 21 {
 ivregress 2sls pLAB1_r (pf_LAB_a16_r = pf_LAB_`a'_r ) f_schmax1_r, cluster(newid)
 matrix B`t'f=B`t'f,_b[pf_LAB_a16_r]
 matrix H`t'f=H`t'f,_b[f_schmax1_r]
 matrix N`t'f=N`t'f,e(N)
}
restore  

**** Save results to datset 

clear 
set obs 1
gen temp=1 
forv t=1/2 {
 svmat B`t'm
 svmat B`t'f
 svmat H`t'm
 svmat H`t'f
 svmat N`t'm
 svmat N`t'f
 }
 
 reshape long B1m B2m H1m H2m N1m N2m B1f B2f H1f H2f N1f N2f, i(temp) j(agez) 
 replace agez=0  if agez==1
 replace agez=12 if agez==2
 replace agez=21 if agez==3
 
 label define agez_vals 0 "avg" 12 "Child age 12 (up to 15)" 21 "Child age 21 (down to 18)" 
 label values agez agez_vals 
  
 reshape long B1 H1 N1 B2 H2 N2, i(temp agez) j(parent) string
 drop temp 
 sort parent agez 
 
 label var B1 "Early Period IRPs"
 label var B2 "Late Period IRPs"
 label var H1 "Early Period coef on HC"
 label var H2 "Late Period coef on HC"
 
 list agez parent B1 H1 N1 B2 H2 N2

 gen spec=2 
 
qui compress 
append using ${save_dir}/SE_irps_iv_v2.dta 
save ${save_dir}/SE_irps_iv_v2.dta , replace


*************************************************
* SPECIFICATION 3: BOTH PARENTS, INCOME & HC 
*************************************************
use `rankedsample', clear

* RESIDUALIZE average measure for child and parent for IRP with average parent income
 foreach var in pLAB pm_LAB m_schmax pf_LAB f_schmax {
	qui regress `var' m_LABAGE_? f_LABAGE_? AGEC? i.year 
	predict `var'_r, resid
 }

* RESDIUALIZE annual measures for parents -- use endogenous measure age
 foreach var in pm_LAB_12 pm_LAB_a16  pm_LAB_21 pf_LAB_12  pf_LAB_a16  pf_LAB_21 {
	qui regress `var' m_LABAGE16_? f_LABAGE16_? AGEC? i.year 
	predict `var'_r, resid
 }

* RESDIUALIZE child income for IV approach (add "1" to variable name) with mothers
 foreach var in pLAB m_schmax f_schmax {
	qui regress `var' m_LABAGE16_? f_LABAGE16_? AGEC? i.year 
	predict `var'1_r, resid
 }

**** Early period   
preserve
local t=1
 loc yrmin=1985
 loc yrmax=1995
 keep if inrange(year,`yrmin',`yrmax') 
 
 capture noisily matrix drop B`t'm H`t'm N`t'm B`t'f H`t'f

*IRP with parent average income
 regress pLAB_r pm_LAB_r m_schmax_r pf_LAB_r f_schmax_r , cluster(newid)
 matrix B`t'm=_b[pm_LAB_r]
 matrix H`t'm=_b[m_schmax_r]
 matrix B`t'f=_b[pf_LAB_r]
 matrix H`t'f=_b[f_schmax_r]
 matrix N`t'm=e(N)
 
*IRPs using one parent income rank to instrument for another
foreach a in 12 21 {
 ivregress 2sls pLAB1_r (pm_LAB_a16_r pf_LAB_a16_r = pm_LAB_`a'_r pf_LAB_`a'_r ) m_schmax1_r f_schmax1_r, cluster(newid)
 matrix B`t'm=B`t'm,_b[pm_LAB_a16_r]
 matrix H`t'm=H`t'm,_b[m_schmax1_r]
 matrix B`t'f=B`t'f,_b[pf_LAB_a16_r]
 matrix H`t'f=H`t'f,_b[f_schmax1_r]
 matrix N`t'm=N`t'm,e(N)
}
restore 

**** Late period 
preserve
local t=2
 loc yrmin=2008
 loc yrmax=2019
 keep if inrange(year,`yrmin',`yrmax') 
 
 capture noisily matrix drop B`t'm H`t'm N`t'm

*IRP with parent average income
 regress pLAB_r pm_LAB_r m_schmax_r pf_LAB_r f_schmax_r , cluster(newid)
 matrix B`t'm=_b[pm_LAB_r]
 matrix H`t'm=_b[m_schmax_r]
 matrix B`t'f=_b[pf_LAB_r]
 matrix H`t'f=_b[f_schmax_r]
 matrix N`t'm=e(N)
 
*IRPs using one parent income rank to instrument for another
foreach a in 12 21 {
 ivregress 2sls pLAB1_r (pm_LAB_a16_r pf_LAB_a16_r = pm_LAB_`a'_r pf_LAB_`a'_r ) m_schmax1_r f_schmax1_r, cluster(newid)
 matrix B`t'm=B`t'm,_b[pm_LAB_a16_r]
 matrix H`t'm=H`t'm,_b[m_schmax1_r]
 matrix B`t'f=B`t'f,_b[pf_LAB_a16_r]
 matrix H`t'f=H`t'f,_b[f_schmax1_r]
 matrix N`t'm=N`t'm,e(N)
}
restore  

 
 
 
 
 
 
 
 
 

**** Save results to datset 

clear 
set obs 1
gen temp=1 
forv t=1/2 {
 svmat B`t'm
 svmat B`t'f
 svmat H`t'm
 svmat H`t'f
 svmat N`t'm
 }
 
 reshape long B1m B2m H1m H2m N1m N2m B1f B2f H1f H2f N1f N2f, i(temp) j(agez) 
 replace agez=0  if agez==1
 replace agez=12 if agez==2
 replace agez=21 if agez==3
 
 label define agez_vals 0 "avg" 12 "Child age 12 (up to 15)" 21 "Child age 21 (down to 18)" 
 label values agez agez_vals 
 
 reshape long B1 H1 N1 B2 H2 N2, i(temp agez) j(parent) string
 drop temp 
 sort parent agez 
 
 label var B1 "Early Period IRPs"
 label var B2 "Late Period IRPs"
 label var H1 "Early Period coef on HC"
 label var H2 "Late Period coef on HC"
 
 list agez parent B1 H1 N1 B2 H2 N2
 
 gen spec=3
 
qui compress 
append using ${save_dir}/SE_irps_iv_v2.dta 
save ${save_dir}/SE_irps_iv_v2.dta , replace
clear
log close 






